Importing Data

In [1]:
In [2]:
Out[2]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197 van
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199 van
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196 car
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207 van
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183 bus

Data Preprocessing

In [3]:
Out[3]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
compactness 1.000000 0.689786 0.791707 0.691081 0.091779 0.148249 0.812770 -0.788736 0.814248 0.676143 0.764361 0.818674 0.585845 -0.250603 0.236685 0.157670 0.298528 0.365552
circularity 0.689786 1.000000 0.797180 0.625051 0.154283 0.251407 0.858265 -0.827246 0.856603 0.965729 0.806791 0.850863 0.935950 0.053080 0.144968 -0.011869 -0.106339 0.045652
distance_circularity 0.791707 0.797180 1.000000 0.771748 0.158684 0.264621 0.907949 -0.913020 0.896273 0.775149 0.865710 0.890541 0.706950 -0.227001 0.114665 0.266049 0.146027 0.333648
radius_ratio 0.691081 0.625051 0.771748 1.000000 0.665363 0.450486 0.738480 -0.792946 0.712744 0.571083 0.798294 0.725598 0.541325 -0.181520 0.049112 0.174469 0.382912 0.472339
pr.axis_aspect_ratio 0.091779 0.154283 0.158684 0.665363 1.000000 0.648861 0.103832 -0.183492 0.079566 0.127322 0.273738 0.089750 0.122454 0.152860 -0.058539 -0.032180 0.240201 0.267760
max.length_aspect_ratio 0.148249 0.251407 0.264621 0.450486 0.648861 1.000000 0.165998 -0.180053 0.161603 0.305943 0.319033 0.143745 0.189752 0.295638 0.015446 0.043491 -0.026184 0.143919
scatter_ratio 0.812770 0.858265 0.907949 0.738480 0.103832 0.165998 1.000000 -0.973504 0.992078 0.810017 0.951672 0.996328 0.800577 -0.028006 0.074376 0.213512 0.005171 0.118504
elongatedness -0.788736 -0.827246 -0.913020 -0.792946 -0.183492 -0.180053 -0.973504 1.000000 -0.950405 -0.776150 -0.938313 -0.956488 -0.766671 0.103535 -0.052243 -0.186027 -0.114846 -0.216769
pr.axis_rectangularity 0.814248 0.856603 0.896273 0.712744 0.079566 0.161603 0.992078 -0.950405 1.000000 0.813135 0.938182 0.992316 0.798522 -0.015711 0.083219 0.215200 -0.019066 0.099481
max.length_rectangularity 0.676143 0.965729 0.775149 0.571083 0.127322 0.305943 0.810017 -0.776150 0.813135 1.000000 0.746657 0.797485 0.866554 0.041283 0.136077 0.001660 -0.104437 0.076770
scaled_variance 0.764361 0.806791 0.865710 0.798294 0.273738 0.319033 0.951672 -0.938313 0.938182 0.746657 1.000000 0.949766 0.781016 0.112452 0.036165 0.196202 0.014434 0.086708
scaled_variance.1 0.818674 0.850863 0.890541 0.725598 0.089750 0.143745 0.996328 -0.956488 0.992316 0.797485 0.949766 1.000000 0.797318 -0.016642 0.077288 0.202398 0.006648 0.103839
scaled_radius_of_gyration 0.585845 0.935950 0.706950 0.541325 0.122454 0.189752 0.800577 -0.766671 0.798522 0.866554 0.781016 0.797318 1.000000 0.192245 0.166785 -0.056067 -0.225882 -0.118597
scaled_radius_of_gyration.1 -0.250603 0.053080 -0.227001 -0.181520 0.152860 0.295638 -0.028006 0.103535 -0.015711 0.041283 0.112452 -0.016642 0.192245 1.000000 -0.088736 -0.126686 -0.752437 -0.804793
skewness_about 0.236685 0.144968 0.114665 0.049112 -0.058539 0.015446 0.074376 -0.052243 0.083219 0.136077 0.036165 0.077288 0.166785 -0.088736 1.000000 -0.035154 0.115728 0.097293
skewness_about.1 0.157670 -0.011869 0.266049 0.174469 -0.032180 0.043491 0.213512 -0.186027 0.215200 0.001660 0.196202 0.202398 -0.056067 -0.126686 -0.035154 1.000000 0.077460 0.205115
skewness_about.2 0.298528 -0.106339 0.146027 0.382912 0.240201 -0.026184 0.005171 -0.114846 -0.019066 -0.104437 0.014434 0.006648 -0.225882 -0.752437 0.115728 0.077460 1.000000 0.893869
hollows_ratio 0.365552 0.045652 0.333648 0.472339 0.267760 0.143919 0.118504 -0.216769 0.099481 0.076770 0.086708 0.103839 -0.118597 -0.804793 0.097293 0.205115 0.893869 1.000000
In [4]:
Out[4]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
count 846.000000 841.000000 842.000000 840.000000 844.000000 846.000000 845.000000 845.000000 843.000000 846.000000 843.000000 844.000000 844.000000 842.000000 840.000000 845.000000 845.000000 846.000000
mean 93.678487 44.828775 82.110451 168.888095 61.678910 8.567376 168.901775 40.933728 20.582444 147.998818 188.631079 439.494076 174.709716 72.447743 6.364286 12.602367 188.919527 195.632388
std 8.234474 6.152172 15.778292 33.520198 7.891463 4.601217 33.214848 7.816186 2.592933 14.515652 31.411004 176.666903 32.584808 7.486190 4.920649 8.936081 6.155809 7.438797
min 73.000000 33.000000 40.000000 104.000000 47.000000 2.000000 112.000000 26.000000 17.000000 118.000000 130.000000 184.000000 109.000000 59.000000 0.000000 0.000000 176.000000 181.000000
25% 87.000000 40.000000 70.000000 141.000000 57.000000 7.000000 147.000000 33.000000 19.000000 137.000000 167.000000 318.000000 149.000000 67.000000 2.000000 5.000000 184.000000 190.250000
50% 93.000000 44.000000 80.000000 167.000000 61.000000 8.000000 157.000000 43.000000 20.000000 146.000000 179.000000 363.500000 173.500000 71.500000 6.000000 11.000000 188.000000 197.000000
75% 100.000000 49.000000 98.000000 195.000000 65.000000 10.000000 198.000000 46.000000 23.000000 159.000000 217.000000 587.000000 198.000000 75.000000 9.000000 19.000000 193.000000 201.000000
max 119.000000 59.000000 112.000000 333.000000 138.000000 55.000000 265.000000 61.000000 29.000000 188.000000 320.000000 1018.000000 268.000000 135.000000 22.000000 41.000000 206.000000 211.000000
In [5]:
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\nonparametric\kde.py:447: RuntimeWarning: invalid value encountered in greater
  X = X[np.logical_and(X > clip[0], X < clip[1])] # won't work for two columns.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\nonparametric\kde.py:447: RuntimeWarning: invalid value encountered in less
  X = X[np.logical_and(X > clip[0], X < clip[1])] # won't work for two columns.
In [6]:
In [7]:
Out[7]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
count 846.000000 841.000000 842.000000 840.000000 844.000000 846.000000 845.000000 845.000000 843.000000 846.000000 843.000000 844.000000 844.000000 842.000000 840.000000 845.000000 845.000000 846.000000 846.000000
mean 93.678487 44.828775 82.110451 168.888095 61.678910 8.567376 168.901775 40.933728 20.582444 147.998818 188.631079 439.494076 174.709716 72.447743 6.364286 12.602367 188.919527 195.632388 2.271868
std 8.234474 6.152172 15.778292 33.520198 7.891463 4.601217 33.214848 7.816186 2.592933 14.515652 31.411004 176.666903 32.584808 7.486190 4.920649 8.936081 6.155809 7.438797 0.818044
min 73.000000 33.000000 40.000000 104.000000 47.000000 2.000000 112.000000 26.000000 17.000000 118.000000 130.000000 184.000000 109.000000 59.000000 0.000000 0.000000 176.000000 181.000000 1.000000
25% 87.000000 40.000000 70.000000 141.000000 57.000000 7.000000 147.000000 33.000000 19.000000 137.000000 167.000000 318.000000 149.000000 67.000000 2.000000 5.000000 184.000000 190.250000 2.000000
50% 93.000000 44.000000 80.000000 167.000000 61.000000 8.000000 157.000000 43.000000 20.000000 146.000000 179.000000 363.500000 173.500000 71.500000 6.000000 11.000000 188.000000 197.000000 3.000000
75% 100.000000 49.000000 98.000000 195.000000 65.000000 10.000000 198.000000 46.000000 23.000000 159.000000 217.000000 587.000000 198.000000 75.000000 9.000000 19.000000 193.000000 201.000000 3.000000
max 119.000000 59.000000 112.000000 333.000000 138.000000 55.000000 265.000000 61.000000 29.000000 188.000000 320.000000 1018.000000 268.000000 135.000000 22.000000 41.000000 206.000000 211.000000 3.000000
In [8]:
Out[8]:
compactness                     93.0
circularity                     44.0
distance_circularity            80.0
radius_ratio                   167.0
pr.axis_aspect_ratio            61.0
max.length_aspect_ratio          8.0
scatter_ratio                  157.0
elongatedness                   43.0
pr.axis_rectangularity          20.0
max.length_rectangularity      146.0
scaled_variance                179.0
scaled_variance.1              363.5
scaled_radius_of_gyration      173.5
scaled_radius_of_gyration.1     71.5
skewness_about                   6.0
skewness_about.1                11.0
skewness_about.2               188.0
hollows_ratio                  197.0
dtype: float64

Dealing with outliers and empty values

In [9]:
In [10]:
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
In [11]:
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
In [12]:
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
In [15]:
Out[15]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
count 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000 846.000000
mean 93.678487 44.823877 82.100473 168.230496 61.154846 8.111111 168.887707 40.936170 20.580378 147.998818 188.596927 439.314421 174.706856 72.443262 6.361702 12.600473 188.918440 195.632388
std 8.234474 6.134272 15.741569 32.018672 5.613458 2.074801 33.197710 7.811882 2.588558 14.515652 31.360427 176.496341 32.546277 7.468734 4.903244 8.930962 6.152247 7.438797
min 73.000000 33.000000 40.000000 104.000000 47.000000 2.000000 112.000000 26.000000 17.000000 118.000000 130.000000 184.000000 109.000000 59.000000 0.000000 0.000000 176.000000 181.000000
25% 87.000000 40.000000 70.000000 141.000000 57.000000 7.000000 147.000000 33.000000 19.000000 137.000000 167.000000 318.250000 149.000000 67.000000 2.000000 5.000000 184.000000 190.250000
50% 93.000000 44.000000 80.000000 167.000000 61.000000 8.000000 157.000000 43.000000 20.000000 146.000000 179.000000 363.500000 173.500000 71.500000 6.000000 11.000000 188.000000 197.000000
75% 100.000000 49.000000 98.000000 194.000000 65.000000 10.000000 198.000000 46.000000 23.000000 159.000000 217.000000 586.750000 198.000000 75.000000 9.000000 19.000000 193.000000 201.000000
max 119.000000 59.000000 112.000000 250.000000 76.000000 13.000000 265.000000 61.000000 29.000000 188.000000 320.000000 1018.000000 268.000000 135.000000 22.000000 41.000000 206.000000 211.000000
In [16]:

Standardizing Data

In [17]:

Splitting Data

In [18]:

Implementing SVM

In [19]:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
In [20]:
              precision    recall  f1-score   support

           1       0.98      0.98      0.98        53
           2       0.98      0.98      0.98        60
           3       0.99      0.99      0.99       141

    accuracy                           0.99       254
   macro avg       0.99      0.99      0.99       254
weighted avg       0.99      0.99      0.99       254

Accuracy: 0.9881889763779528
Out[20]:
array([[ 52,   1,   0],
       [  0,  59,   1],
       [  1,   0, 140]], dtype=int64)

K fold Validation

In [21]:
Out[21]:
array([0.94117647, 0.95294118, 0.94705882, 0.95857988, 0.95209581])
In [22]:
Accuracy: 0.95 (+/- 0.01)

Applying PCA

In [23]:
[[ 0.27273518  0.28741899  0.30232752  0.27011948  0.09689464  0.19559503
   0.31072282 -0.30890325  0.30755002  0.27825022  0.29676248  0.30688497
   0.26356413 -0.04037111  0.04217391  0.05878818  0.03739285  0.08389378]
 [-0.09402491  0.13257726 -0.04946733 -0.19626984 -0.26136031 -0.11137445
   0.0705384  -0.00837394  0.08256217  0.12324944  0.09259617  0.07752009
   0.21356472  0.48006222 -0.04169996 -0.10051474 -0.50896972 -0.51312981]
 [ 0.07263159  0.19190467 -0.04621247 -0.1126621  -0.06594468  0.20306458
  -0.10502353  0.1056912  -0.09627636  0.21855804 -0.16056044 -0.10705279
   0.18415833 -0.1008689   0.62746259 -0.58199598  0.0454722   0.04921917]
 [ 0.12924861 -0.07540313  0.11541801 -0.24458828 -0.6237589   0.23324969
   0.03143173  0.0453379   0.0567906  -0.00417757 -0.04156598  0.02793803
  -0.11765801 -0.13062663  0.34796787  0.54110943 -0.05623672  0.06510857]
 [ 0.16250793 -0.14182777 -0.0888463   0.13330928  0.08845701 -0.63156104
   0.07899073 -0.0645369   0.07642302 -0.25482915  0.16684589  0.1168687
  -0.00504046  0.16370232  0.5606893   0.10269531  0.18777601 -0.10693811]
 [ 0.23689551 -0.06283367 -0.01550653 -0.15034991 -0.55492977 -0.28721853
   0.1038946  -0.08474375  0.11127167 -0.06923381  0.09890243  0.1427139
  -0.06509349 -0.20013661 -0.3520953  -0.47126749  0.26222663  0.04719469]
 [ 0.2591551  -0.37473332  0.13690977  0.17132755  0.04059489  0.4352907
   0.0490811  -0.06593772  0.04684041 -0.2982151   0.1521157   0.02886298
  -0.46703474  0.29490781  0.08974557 -0.29653525 -0.15724051 -0.07436998]
 [ 0.19325357  0.11440448 -0.15880511 -0.01157118 -0.12252233  0.05897638
  -0.1494669   0.13822565 -0.15467945  0.24557204  0.19197281 -0.143106
   0.01699308  0.66945992 -0.09759643  0.09640604  0.3792008   0.33411534]]
[9.75662909 3.23134049 1.196735   1.15380498 0.87640578 0.6676913
 0.31935505 0.27668731]
[[ 0.58070143 -0.68734308  0.26874387 ... -1.7545482  -0.25838013
  -0.09008   ]
 [-1.50893659 -0.40335631  0.52678927 ... -0.11490211  0.26733187
   0.18909573]
 [ 3.91077207  0.14145812  1.14877533 ... -0.67349972  0.68653145
  -0.37997   ]
 ...
 [ 5.10311221 -0.17171947  0.31439493 ...  0.24336737  0.87232412
  -0.13674276]
 [-3.30484055 -1.00999455 -1.76948991 ...  0.07063498 -0.40494919
  -0.55842163]
 [-4.96003436  0.40647288 -1.15350817 ...  0.40879893 -0.43721497
  -0.13532674]]

Splitting data

In [24]:

Implementing SVM

In [25]:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
In [26]:
              precision    recall  f1-score   support

           1       0.92      0.92      0.92        53
           2       0.97      0.98      0.98        60
           3       0.98      0.97      0.98       141

    accuracy                           0.96       254
   macro avg       0.96      0.96      0.96       254
weighted avg       0.96      0.96      0.96       254

Accuracy: 0.9645669291338582
Out[26]:
array([[ 49,   2,   2],
       [  0,  59,   1],
       [  4,   0, 137]], dtype=int64)

K fold validation

In [27]:
Out[27]:
array([0.94117647, 0.95294118, 0.94705882, 0.95857988, 0.95209581])
In [28]:
Accuracy: 0.95 (+/- 0.01)
In [30]: